# THIS SCRIPT CODES AND CLEANS DATA FROM THE ANES CUMULATIVE FILE. IT NEEDS TO BE 
# USED IN CONJUNCTION WITH THE FILE (AVAILABLE ON DATAVERSE) 
# ENTITLED "CES20_Common_OUTPUT_vv.dta"
library(foreign)
library(car)
library(readstata13)
library(ggplot2)
library(stargazer)
library(grid)
library(gridExtra)
library(survey)
library(dplyr)
library(lemon)
library(ggpubr)
library(reshape2)
#-----
# READ IN THE DATA. SELECT THE FILE "CES20_Common_OUTPUT_vv.dta"
our.data = read.dta13(file.choose(), nonint.factors = TRUE,
                      generate.factors = TRUE)
#------
# VOTED IN PRESIDENTIAL PRIMARIES
table(our.data$CL_2020ppvm)
our.data$voted.presprimaries = ifelse(is.na(our.data$CL_2020ppvm), 0, 1)
table(our.data$voted.presprimaries, our.data$CL_2020ppvm)
# 1 = VALIDATED VOTING RECORD OF PARTICIPATION IN THE PRIMARIES; 0 = NOT VALIDATED VOTING RECORD

# STATES IN 2020 THAT DIDN'T HOLD PRESIDENTIAL PRIMARIES
states.2020 = c("Alaska", "Hawaii", "Iowa", "Kansas", "North Dakota",
                "Nevada", "Wyoming")

# VOTED IN PRESIDENTIAL PRIMARIES, REMOVING STATES THAT DIDN'T HOLD PRIMARIES 
our.data$voted.presprimaries2 = our.data$voted.presprimaries
table(our.data$voted.presprimaries2)
our.data$voted.presprimaries2[our.data$inputstate %in% c(states.2020)] = NA
table(our.data$voted.presprimaries2)

# VOTED IN GENERAL ELECTION
table(our.data$CL_2020gvm)
our.data$voted.general = ifelse(is.na(our.data$CL_2020gvm)=="TRUE", 0, 1)
table(our.data$voted.general, our.data$CL_2020gvm)
# 1 = VALIDATED VOTING RECORD IN PRESIDENTIAL ELECTION; 0 = NOT VALIDATED VOTING RECORD 

# PARTISANSHIP 
table(our.data$pid7)
our.data$partisanship = car::recode(as.numeric(our.data$pid7), "8 = 4") - 1
table(our.data$partisanship, our.data$pid7)
# 0-TO-6 SCALE, 0 = STRONG DEMOCRAT; 6 = STRONG REPUBLICAN; NOT SURE CODED AS 
# PURE INDEPENDENTS

# REPUBLICAN
our.data$republican = ifelse((our.data$partisanship>=4 & our.data$partisanship<=6), 1, 0)
table(our.data$republican, our.data$partisanship)
# 1 = REPUBLICAN; 0 = NOT REPUBLICAN

# DEMOCRAT
our.data$democrat = ifelse((our.data$partisanship>=0 & our.data$partisanship<=2), 1, 0)
table(our.data$democrat, our.data$partisanship)
# 1 = DEMOCRAT; 0 = NOT DEMOCRAT

# IDEOLOGY
table(our.data$CC20_340a)
our.data$ideology = car::recode(as.numeric(our.data$CC20_340a), "8 = 4") - 1
table(our.data$ideology, our.data$CC20_340a)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 'NOT SURE' CODED AS MIDDLE OF THE ROAD

# CONSERVATIVE
our.data$conservative = ifelse((our.data$ideology>=4 & our.data$ideology<=6), 1, 0)
table(our.data$conservative, our.data$CC20_340a)
# 1 = CONSERVATIVE; 0 = NOT CONSERVATIVE

# LIBERAL
our.data$liberal = ifelse((our.data$ideology>=0 & our.data$ideology<=2), 1, 0)
table(our.data$liberal, our.data$CC20_340a)
# 1 = LIBERAL; 0 = NOT LIBERAL

# SORTED
our.data$sorted = rep(0, length(our.data$ideology))
our.data$sorted[our.data$republican==1 & our.data$conservative==1] = 1
table(our.data$sorted)
our.data$sorted[our.data$democrat==1 & our.data$liberal==1] = 1
table(our.data$sorted)
our.data$sorted[is.na(our.data$partyID)=="TRUE" | is.na(our.data$ideology)=="TRUE"] = NA
table(our.data$sorted)
table(our.data$sorted[our.data$republican==1], our.data$ideology[our.data$republican==1])
table(our.data$sorted[our.data$democrat==1], our.data$ideology[our.data$democrat==1])
summary(our.data$sorted)
# 1 = CONSERVATIVE REPUBLICAN OR LIBERAL DEMOCRAT (SELF-IDENTIFIED); 0 = ALL OTHERS

# PARTY PLACEMENT: DEMOCRATIC PARTY 
table(our.data$CC20_340e)
our.data$demplacement = car::recode(as.numeric(our.data$CC20_340e), "8 = NA") - 1
table(our.data$demplacement, our.data$CC20_340e)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 
# NOT SURE PROVISIONALLY CODED AS NA

# PARTY PLACEMENT: REPUBLICAN PARTY 
table(our.data$CC20_340f)
our.data$GOPplacement = car::recode(as.numeric(our.data$CC20_340f), "8 = NA") - 1
table(our.data$GOPplacement, our.data$CC20_340f)
summary(our.data$GOPplacement)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 
# NOT SURE PROVISIONALLY CODED AS NA

# KNOWS PARTIES' POLICY REPUTATIONS
our.data$knowsreputations = ifelse((our.data$GOPplacement > our.data$demplacement) & 
                                     (our.data$GOPplacement>=0 & our.data$GOPplacement<=6) & 
                                     (our.data$demplacement>=0 & our.data$demplacement<=6), 1, 0)
table(our.data$knowsreputations)
sum(table(our.data$knowsreputations))
# DK CODED AS NOT KNOWING POLICY REPUTATIONS
our.data$knowsreputations[our.data$CC20_340e=="Not sure"] = 0
table(our.data$knowsreputations)
our.data$knowsreputations[our.data$CC20_340f=="Not sure"] = 0
table(our.data$knowsreputations)
sum(table(our.data$knowsreputations))
# 0 = DOESN'T KNOW THAT THE REPUBLICAN PARTY IS MORE CONSERVATIVE THAN THE DEMOCRATIC PARTY;
# 1 = KNOWS THAT THE REPUBLICAN PARTY IS MORE CONSERVATIVE THAN THE DEMOCRATIC PARTY. 

# PROGRAMMATIC INDEX
our.data$programmatic = rep(NA, length(our.data$ideology))
# NOT MATCHED
our.data$programmatic[our.data$sorted==0 & our.data$knowsreputations==0] = 0
table(our.data$programmatic)
# PARTIALLY MATCHED
our.data$programmatic[our.data$sorted==1 & our.data$knowsreputations==0] = 0.5
table(our.data$programmatic)
our.data$programmatic[our.data$sorted==0 & our.data$knowsreputations==1] = 0.5
table(our.data$programmatic)
# MATCHED
our.data$programmatic[our.data$sorted==1 & our.data$knowsreputations==1] = 1
table(our.data$programmatic)
summary(our.data$programmatic)
# 1 = MATCHED; 0.5 = PARTIALLY MATCHED; 0 = UNMATCHED 

# SAVE DATASET 
# write.csv(our.data, "CCES2020_cleaned.csv")
